
library(rio)
library(ggplot2)
library(texreg)
library(tidyverse)
library(AER)
library(broom)
library(gridExtra)

##set the working directory for the replication archive
working_dir <- "~/Dropbox/dueling project/2nd_paper/replication archive/"

source(paste0(working_dir,"Auxiliary Scripts/RegressionRunner.R"))

se <- function(m) summary(m)$coef[,2]


##load in the main datasets
churches1850 <- readRDS(paste0(working_dir,"Data/churches_1850.RDS"))
churches1860 <- readRDS(paste0(working_dir,"Data/churches_1860.RDS"))
churches1850$adherents <- log(1000*(churches1850$tot_seats_churches_1850 + 1)/churches1850$tot_pop_1850)
churches1860$adherents <- log(1000*(churches1860$tot_seats_churches_1860 + 1)/churches1860$tot_pop_1860) 


churches1850 <- 
  churches1850 %>% 
  group_by(countyfips) %>% 
  arrange(decade) %>% 
  mutate(numPost_lag = lag(numPost, 1)) %>% 
  ungroup()

churches1860 <- 
  churches1860 %>% 
  group_by(countyfips) %>% 
  arrange(decade) %>% 
  mutate(numPost_lag = lag(numPost, 1)) %>% 
  ungroup()


mod_1 <- lm(adherents ~ log((numPost + 1)) + I(DVoteshare) +
                   log(all_papers1840 + 1) +
                   log(tot_pop_1850) + log(AREA_SQMI) + 
                   log(foreign_born_tot_1850 + 1) + log(tot_manuf_output_1850 + 1) +
                   log(capital_manuf_1850 + 1) + foreign_born_tot_pop_sh_1850, 
                 data = churches1850 %>% filter(decade == 1840)) 

mod_2 <- lm(adherents ~ log((numPost + 1)) + I(DVoteshare) +
              log(all_papers1840 + 1) +
              log(tot_pop_1860) + log(AREA_SQMI_1860) + 
              log(foreign_born_tot_1860 + 1) + log(tot_manuf_output_1860 + 1) +
              log(capital_manuf_1860 + 1) + foreign_born_tot_pop_sh_1860, 
            data = churches1860 %>% filter(decade == 1850))


mod_rev_1 <- lm(log((numPost + 1))  ~ adherents + log(numPost_lag + 1) + I(DVoteshare) +
                log(all_papers1840 + 1) +
                log(tot_pop_1850) + log(AREA_SQMI) + 
                log(foreign_born_tot_1850 + 1) + log(tot_manuf_output_1850 + 1) +
                log(capital_manuf_1850 + 1) + foreign_born_tot_pop_sh_1850, 
              data = churches1850 %>% filter(decade == 1850)) 

mod_rev_2 <- lm(log((numPost + 1))  ~ adherents + log(numPost_lag + 1) + I(DVoteshare) +
                  log(all_papers1840 + 1) +
                  log(tot_pop_1860) + log(AREA_SQMI_1860) + 
                  log(foreign_born_tot_1860 + 1) + log(tot_manuf_output_1860 + 1) +
                  log(capital_manuf_1860 + 1) + foreign_born_tot_pop_sh_1860, 
                data = churches1860 %>% filter(decade == 1870)) 

##now see what predicts what
sc_longdata <- 
  data.frame(
    "Outcome" = c(rep("Church Adherents", 2), rep("Post Offices", 2)),
    "Decade" = c(1850, 1860, 1860, 1870),
    "estimate" = c(coef(mod_1)[2], coef(mod_2)[2], coef(mod_rev_1)[2], coef(mod_rev_2)[2]),
    "se" = c(se(mod_1)[2], se(mod_2)[2], se(mod_rev_1)[2], se(mod_rev_2)[2])
  )

the_plot1 <- 
  ggplot(sc_longdata %>% filter(Outcome == "Post Offices"), aes(Decade, estimate)) +
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = estimate - 1.96*se, ymax = estimate + 1.96*se),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  ylab("Coefficient on Church Adherents (lagged one decade and logged)") +
  xlab("Number of Post Offices this Decade") +
  coord_flip() +
  theme_bw() +
  theme(legend.title = element_text(size = 20),
        axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
        legend.text = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20)) + 
  theme(legend.position="bottom") +
  scale_x_continuous(breaks = c(1860, 1870))

the_plot2 <- 
  ggplot(sc_longdata %>% filter(Outcome != "Post Offices"), aes(Decade, estimate)) +
  geom_point(size = 5, position=position_dodge(width=0.5)) +
  geom_errorbar(
    aes(ymin = estimate - 1.96*se, ymax = estimate + 1.96*se),
    width = 0.01,
    linetype = "solid",
    position=position_dodge(width=0.5)) +
  geom_hline(yintercept = 0, linetype = "dashed") + 
  ylab("Coefficient on Post Offices (lagged one decade and logged)") +
  xlab("Number of Church Adherents this Decade") +
  coord_flip() +
  theme_bw() +
  theme(legend.title = element_text(size = 20),
        axis.text.x = element_text(size = 20), axis.text.y = element_text(size = 20),
        legend.text = element_text(size = 20),
        axis.title.x = element_text(size = 20),
        axis.title.y = element_text(size = 20),
        strip.text.x = element_text(size = 20)) + 
  theme(legend.position="bottom") + 
  scale_x_continuous(breaks = c(1850, 1860))

big_plot <- grid.arrange(the_plot1,the_plot2)

ggsave(big_plot, 
       file = paste0(working_dir,"Replication Plots/social_capital_causal_ordering.pdf"), 
       width = 14, height = 10)
